******************************************************************************************
* ONLINE APPENDIX FIGURE A12, PANEL A
* 
* 1) ESTIMATE THE EFFECT OF THE VENEZUELAN IMMIGRATION ON WAGES, EMPLOYMENT, 
* UNEMPLOYMENT, AND LABOR FORCE PARTIPATION ON NATIVES UISNG CONFIDENCE SETS ROBUST 
* TO WEAK INSTRUMENTS
* 
* LAST MODIFIED: JULY, 2025
******************************************************************************************

clear all 

* INSTALL SCHEMES FOR GRAPHS
*ssc install blindschemes, replace all 
*ssc install schemepack, replace /*based on https://github.com/asjadnaqvi/Stata-schemes*/

**# DIRECTORIES

* PAPER DIRECTORY DROPBOX
global fiscal "/Users/andres/Library/CloudStorage/Dropbox/2 Papers/2023/Paper_Fiscal_Costs"

* SUBDIRECTORIES
global data "$fiscal/1 Data/12 Final Datasets"
global programs "$fiscal/2 Programs/2 Figures and Tables paper"
global results "$fiscal/3 Results"

**# LOAD DATASET AND CREATE NEW VARIABLES

* LOAD DATASET WITH VARIABLES NEEDED
use "$data/LaborEffects.dta", clear

**#SAMPLE SELECTION 

* ONLY METROPOLITAN AREAS 
drop if area >= 81 /* not observations for these areas*/
drop if area == 25 /* not observations */
keep if area !=. 

* DUMMIES FOR YEAR AND MSA
tab year, gen(year)
tab area, gen(area)

* KEEP ONLY WORKING-AGE POPULATION
drop if (age<15|age>64) 

* KEEP IF NOT ENROLLED IN SCHOOL
*keep if att_school == 2 

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if weekly_hours>100

* DELIMIT THE ANALYSIS FOR THE YEARS 2013-2018. 
drop if year > 2018

* DISPLAY FORMAT FOR SAMPLING WEIGHTS 
format %12.0f fex12

* GENERATE ROUNDED WEIGHTS FOR SPECIFIC COMMANDS THAT REQUIRE THEM 
gen fex12_round = fex12
replace fex12_round = round(fex12_round) 

**# GLOBAL MACROS FOR VARIABLES
global controls year2-year6 area2-area23 i.sex1 age c.age#c.age educ2-educ5
global x        mig_share2013
global z1       IV2005
global z2       IVdist
global w        fex12

**# ESTIMATION RESULTS

**## 1) EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' UNEMPLOYMENT

matrix A = J(3,5,.) /* create matrix to save coefficients and confidence intervals*/
matrix colnames A = b ll ul ll_AR ul_AR 
matrix rownames A = OLS IV1 IV2 

**### OLS
ivreg2 unemp $x $controls if labforce == 1 & area != . & immig == 0, cluster(area)  partial(area2-area23) small 
matrix A[1,1] = _b[$x]
matrix A[1,2] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix A[1,3] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]
matrix A[1,4] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix A[1,5] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 unemp ($x = $z1) $controls if labforce == 1 & immig == 0 & area != . , first cluster(area) partial(area2-area23) small 
matrix A[2,1] = _b[$x]
matrix A[2,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix A[2,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

**#### ANDERSON-RUBIN CI
twostepweakiv 2sls unemp ($x = $z1) $controls  if labforce == 1 & immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv1_ci = e(arci) 	// saves it as a varibale in data
split iv1_ci, parse(,)	// split at ,
replace iv1_ci1 = subinstr(iv1_ci1, "[", "", .) // removes [
replace iv1_ci2 = subinstr(iv1_ci2, "]", "", .) // removes ]
destring iv1_ci1 iv1_ci2, replace
sum iv1_ci1
return list
matrix A[2,4] = r(mean)	// Low-CI
sum iv1_ci2
return list
matrix A[2,5] = r(mean)	// High-CI

**### IV2 (DISTANCE INSTRUMENT)
ivreg2 unemp ($x = $z2) $controls if labforce == 1 & immig == 0 & area != . , first cluster(area) partial(area2-area23) small 
matrix A[3,1] = _b[$x]
matrix A[3,2] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix A[3,3] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]
**####ANDERSON-RUBIN CI
twostepweakiv 2sls unemp ($x = $z2) $controls  if labforce == 1 & immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv2_ci = e(arci) 	// saves it as a varibale in data
split iv2_ci, parse(,)	// split at ,
replace iv2_ci1 = subinstr(iv2_ci1, "[", "", .) // removes [
replace iv2_ci2 = subinstr(iv2_ci2, "]", "", .) // removes ]
destring iv2_ci1 iv2_ci2, replace
sum iv2_ci1
return list
matrix A[3,4] = r(mean)	// Low-CI
sum iv2_ci2
return list
matrix A[3,5] = r(mean)	// High-CI

matrix list A
drop iv1_ci-iv2_ci2


**## 2) EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' EMPLOYMENT

matrix B = J(3,5,.)
matrix colnames B = b ll ul ll_AR ul_AR 
matrix rownames B = OLS IV1 IV2 

**### OLS
ivreg2 emp $x $controls if working_age == 1 & area != . & immig == 0 , cluster(area)  partial(area2-area23) small 
matrix B[1,1] = _b[$x]
matrix B[1,2] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix B[1,3] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]
matrix B[1,4] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix B[1,5] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 emp ($x = $z1) $controls if working_age == 1 & immig == 0 & area != . , first cluster(area) partial(area2-area23) small 
matrix B[2,1] = _b[$x]
matrix B[2,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix B[2,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]
**#### ANDERSON-RUBIN CI
twostepweakiv 2sls emp ($x = $z1) $controls  if working_age == 1 & immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv1_ci = e(arci) 	// saves it as a varibale in data
split iv1_ci, parse(,)	// split at ,
replace iv1_ci1 = subinstr(iv1_ci1, "[", "", .) // removes [
replace iv1_ci2 = subinstr(iv1_ci2, "]", "", .) // removes ]
destring iv1_ci1 iv1_ci2, replace
sum iv1_ci1
return list
matrix B[2,4] = r(mean)	// Low-CI
sum iv1_ci2
return list
matrix B[2,5] = r(mean)	// High-CI

**### IV2 (DISTANCE INSTRUMENT)
ivreg2 emp ($x = $z2) $controls if working_age == 1 & immig == 0 & area != . , first cluster(area) partial(area2-area23) small 
matrix B[3,1] = _b[$x]
matrix B[3,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix B[3,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]
**#### ANDERSON-RUBIN CI
twostepweakiv 2sls emp ($x = $z2) $controls  if working_age == 1 & immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv2_ci = e(arci) 	// saves it as a varibale in data
split iv2_ci, parse(,)	// split at ,
replace iv2_ci1 = subinstr(iv2_ci1, "[", "", .) // removes [
replace iv2_ci2 = subinstr(iv2_ci2, "]", "", .) // removes ]
destring iv2_ci1 iv2_ci2, replace
sum iv2_ci1
return list
matrix B[3,4] = r(mean)	// Low-CI
sum iv2_ci2
return list
matrix B[3,5] = r(mean)	// High-CI

matrix list B
drop iv1_ci-iv2_ci2

**##  3) EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' WAGES 
* NOTE: USUALLY THE SAMPLE IS RESTRICTED TO NATIVES IN MSAs WHO ARE 
* NOT ATTENDING SCHOOL AND NOT SELF-EMPLOYED. HOWEVER, MOST THE 
* PAPERS EXPLORING SIMILAR TOPICS FOR COLOMBIA DON'T DO SO.  

preserve
**### SAMPLE CONSIDERATIONS WAGE ESTIMATIONS
* KEEP IF NOT SELF-EMPLOYED 
*keep if p6430 != 4

* KEEP ONLY SALARIED WORKERS
*keep if p6430 == 1

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if p6800>100

* HANDLING OUTLIERS
levelsof year, local(year)
foreach j of local year {
display "this is year `j'"
centile hourly_real_wages if year == `j', cent(0.5 99.5)
return list 
local cent1 = r(c_1)
display `cent1'
local cent99 = r(c_2)
display `cent99'
* keep only de wage distribution between 1 and 99
drop if hourly_real_wages > `cent99'  
drop if hourly_real_wages < `cent1' 
}

matrix C = J(3,5,.)
matrix colnames C = b ll ul ll_AR ul_AR 
matrix rownames C = OLS IV1 IV2 

**### OLS
ivreg2 log_hourly_real_wages $x $controls if labforce == 1 & immig == 0 & area != . , cluster(area)  partial(area2-area23) small 
matrix C[1,1] = _b[$x]
matrix C[1,2] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix C[1,3] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]
matrix C[1,4] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix C[1,5] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 log_hourly_real_wages ($x = $z1) $controls if labforce == 1 & immig == 0 & area != .  , first cluster(area)  partial(area2-area23) small 
matrix C[2,1] = _b[$x]
matrix C[2,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix C[2,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

**#### ANDERSON-RUBIN CI
twostepweakiv 2sls log_hourly_real_wages ($x = $z1) $controls  if labforce == 1 & immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv1_ci = e(arci) 	// saves it as a varibale in data
split iv1_ci, parse(,)	// split at ,
replace iv1_ci1 = subinstr(iv1_ci1, "[", "", .) // removes [
replace iv1_ci2 = subinstr(iv1_ci2, "]", "", .) // removes ]
destring iv1_ci1 iv1_ci2, replace
sum iv1_ci1
return list
matrix C[2,4] = r(mean)	// Low-CI
sum iv1_ci2
return list
matrix C[2,5] = r(mean)	// High-CI

**### IV2 (DISTANCE INSTRUMENT)
ivreg2 log_hourly_real_wages ($x = $z2) $controls if labforce == 1 & immig == 0 & area != .  , first cluster(area)  partial(area2-area23) small 
matrix C[3,1] = _b[$x]
matrix C[3,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix C[3,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

**#### ANDERSON-RUBIN CI
twostepweakiv 2sls log_hourly_real_wages ($x = $z2) $controls  if labforce == 1 & immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv2_ci = e(arci) 	// saves it as a varibale in data
split iv2_ci, parse(,)	// split at ,
replace iv2_ci1 = subinstr(iv2_ci1, "[", "", .) // removes [
replace iv2_ci2 = subinstr(iv2_ci2, "]", "", .) // removes ]
destring iv2_ci1 iv2_ci2, replace
sum iv2_ci1
return list
matrix C[3,4] = r(mean)	// Low-CI
sum iv2_ci2
return list
matrix C[3,5] = r(mean)	// High-CI

matrix list C
drop iv1_ci-iv2_ci2

restore /*restore sample before considering wage outliers*/


**## 4: EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' LABOR FORCE PARTICIPATION

matrix D = J(3,5,.)
matrix colnames D = b ll ul ll_AR ul_AR 
matrix rownames D = OLS IV1 IV2 

**### OLS
ivreg2 labforce $x $controls if immig == 0 & area != .  , cluster(area)  partial(area2-area23) small 
matrix D[1,1] = _b[$x]
matrix D[1,2] = _b[$x] - invttail(e(df_r),0.025)*_se[$x]
matrix D[1,3] = _b[$x] + invttail(e(df_r),0.025)*_se[$x]
matrix D[1,4] = _b[$x] - invttail(e(df_r),0.025)*_se[$x] /* same as 1,2 to facilitate graphs*/
matrix D[1,5] = _b[$x] + invttail(e(df_r),0.025)*_se[$x] /* same as 1,2 to facilitate graphs*/

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 labforce ($x = $z1) $controls if immig == 0 & area != .  , first cluster(area) partial(area2-area23) small 
matrix D[2,1] = _b[$x]
matrix D[2,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix D[2,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

**#### ANDERSON-RUBIN CI
twostepweakiv 2sls  labforce ($x = $z1) $controls  if immig == 0 & area != .  , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv1_ci = e(arci) 	// saves it as a varibale in data
split iv1_ci, parse(,)	// split at ,
replace iv1_ci1 = subinstr(iv1_ci1, "[", "", .) // removes [
replace iv1_ci2 = subinstr(iv1_ci2, "]", "", .) // removes ]
destring iv1_ci1 iv1_ci2, replace
sum iv1_ci1
return list
matrix D[2,4] = r(mean)	// Low-CI
sum iv1_ci2
return list
matrix D[2,5] = r(mean)	// High-CI

**### IV2 (DISTANCE INSTRUMENT)
ivreg2  labforce ($x = $z2) $controls if immig == 0 & area != . , first cluster(area) partial(area2-area23) small 
matrix D[3,1] = _b[$x]
matrix D[3,2] = _b[$x ] - invttail(e(df_r),0.025)*_se[$x]
matrix D[3,3] = _b[$x ] + invttail(e(df_r),0.025)*_se[$x]

**#### ANDERSON-RUBIN CI
twostepweakiv 2sls  labforce ($x = $z2) $controls  if immig == 0 & area != . , cluster(area) partial(area2-area23) small
estadd local arci = e(ar_cset)	// Anderson-Rubin CI
di e(arci)
gen iv2_ci = e(arci) 	// saves it as a varibale in data
split iv2_ci, parse(,)	// split at ,
replace iv2_ci1 = subinstr(iv2_ci1, "[", "", .) // removes [
replace iv2_ci2 = subinstr(iv2_ci2, "]", "", .) // removes ]
destring iv2_ci1 iv2_ci2, replace
sum iv2_ci1
return list
matrix D[3,4] = r(mean)	// Low-CI
sum iv2_ci2
return list
matrix D[3,5] = r(mean)	// High-CI

matrix list D
drop iv1_ci-iv2_ci2


**# PLOTTING EFFECTS

**## WAGES

*  T-BASED CIs
coefplot (matrix(C[,1]), ci((C[,2] C[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on log hourly wages", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("A. Wages", size(medium))
		 tempfile results1
		 graph save "`results1'"

* AR CIs
coefplot (matrix(C[,1]), ci((C[,4] C[,5])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on log hourly wages", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("A. Wages", size(medium))
		 tempfile results1B
		 graph save "`results1B'"

		 
**# UNEMPLOYMENT 
		 
* T-BASED CIs
coefplot (matrix(A[,1]), ci((A[,2] A[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(unemployment = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
title("B. Unemployment", size(medium))
tempfile results2
graph save "`results2'"

* AR CIs
coefplot (matrix(A[,1]), ci((A[,4] A[,5])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(unemployment = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
title("B. Unemployment", size(medium))
tempfile results2B
graph save "`results2B'"


**# EMPLOYMENT

* T-BASED CIs
coefplot (matrix(B[,1]), ci((B[,2] B[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(employment = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("C. Employment", size(medium))
		 tempfile results3
		 graph save "`results3'"		 

* AR CIs
coefplot (matrix(B[,1]), ci((B[,4] B[,5])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(employment = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("C. Employment", size(medium))
		 tempfile results3B
		 graph save "`results3B'"

		 
**# LABOR FORCE PARTICIPATION

* T-BASED CIs
coefplot (matrix(D[,1]), ci((D[,2] D[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(labor force = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("D. Labor force", size(medium))
		 tempfile results4
		 graph save "`results4'"

* AR CIs
coefplot (matrix(D[,1]), ci((D[,4] D[,5])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(labor force = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("D. Labor force", size(medium))
		 tempfile results4B
		 graph save "`results4B'"


		 
**# FIGURE 4 PAPER: LABOR MARKET EFFECTS (T-BASED CIs)
*graph combine "`results1'" "`results2'" "`results3'" "`results4'" ///
*, graphregion(color(white)) cols(2) rows(2) xcommon  iscale(0.72) scheme(plottig)
*qui graph save "$results/1 Figures Paper/Fig4.gph", replace 
*qui graph export "$results/1 Figures Paper/Fig4.eps", as(eps) replace


**# ONLINE APPENDIX A12, PANEL
* LABOR MARKET EFFECTS NO INCLUDED IN THE PAPER (AR-BASED CIs ROBUST TO WEAK INSTRUMENTS)
graph combine "`results1B'" "`results2B'" "`results3B'" "`results4B'" ///
, graphregion(color(white)) cols(2) rows(2) xcommon  iscale(0.72) scheme(plottig)
qui graph save "$results/1 Figures Paper/FigA12_A.gph", replace 
qui graph export "$results/1 Figures Paper/FigA12_A.eps", as(eps) replace		 


